<?php
set_time_limit(0);
include_once('../common.inc.php');
include_once(_INC_PATH.'database.php');

$url = 'http://developer.51cto.com/col/1441/';
$maxpage = 2;
$db = new database();

for($i=1; $i<=$maxpage; $i++)
{
	$tempUrl = $url."list_1441_$i.htm";
	$listHtml = file_get_contents($tempUrl);
	preg_match_all('#<div class=\'m_list\'>(.*)</div>#isU',$listHtml,$matches);
	if(is_array($matches[1]) && count($matches[1])>0)
	{
		foreach($matches[1] as $key=>$value)
		{
			preg_match_all('#<a href="(.*)">#isU',$value,$hrefMatches);
			if(is_array($hrefMatches[1]) && count($hrefMatches[1])>0)
			{
				foreach($hrefMatches[1] as $techUrl)
				{
					$techHtml = file_get_contents($techUrl);
					preg_match('#<h1>(.*)</h1>#is',$techHtml,$titleMatches);
					$title = $titleMatches[1];unset($titleMatches);
					preg_match('#<div class="msg">(.*)(\d{4}-\d{2}-\d{2} \d{2}:\d{2})(.*)class="f12-a"#isU',$techHtml,$timeMatches);
					if(isset($timeMatches[2]) && $timeMatches[2] != ""){
						$addTime = $timeMatches[2];
					}else{
						$addTime = date("Y-m-d H:i");
					}
					unset($timeMatches);
					preg_match('#<div id="content">(.*)<div class="tips">#is',$techHtml,$contentMatches);
					$content = $contentMatches['1'];unset($contentMatches);
					preg_match('#<div class="page">(.*)</div>#isU',$techHtml,$pageMatches);
					if(!empty($pageMatches[1])){
						$pageHtml = $pageMatches[1];unset($pageMatches);
						preg_match_all('#<a href=\'(.*)\'#isU',$pageHtml,$techMatches);
						if(!empty($techMatches[1]))
						{
							$urlArray = array_unique($techMatches[1]);unset($techMatches);
							$baseUrl = dirname($techUrl)."/";
							foreach($urlArray as $nextUrl)
							{
								$techUrl = $baseUrl.$nextUrl;
								$techHtml = file_get_contents($techUrl);
								preg_match('#<div id="content">(.*)<div class="tips">#is',$techHtml,$contentMatches);
								$content .= $contentMatches['1'];unset($contentMatches);
							}
						}
					}
					$temprow = array();
					$temprow['title'] = iconv("gb2312","utf-8",$title);
					//$temprow['title'] = mb_convert_encoding($title,"utf-8","gb2312");
					$temprow['authorId'] = 'admin';
					$temprow['authorName'] = '百企编辑部';
					$temprow['addTime'] = strtotime($addTime);
					$temprow['mTime'] = strtotime($addTime);
					$temprow['published'] = '0';
					if(!$db){
						$db = new database();
					}
					$techId = $db->insert($temprow,'tbl_tech');
					if($techId > 0){
						$contentrow = array();
						$contentrow['techId'] = $techId;
						$contentrow['content'] = mysql_escape_string(iconv("gb2312","utf-8",$content));
						$db->insert($contentrow,'tbl_tech_content');
					}
				}
			}
		}
	}
}
echo "finish";
?>